package com.ztesoft.test.service;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @author code4crafter@gmail.com <br>
 */
public class Info58Processor implements PageProcessor {

	public static final String URL_LIST = "http://yinchuan\\.58\\.com/jiefdj/baomu/.";

	public static final String URL_POST = "http://yinchuan\\.58\\.com/baomu/\\w+\\.shtml?\\w+";

	private Site site = Site.me().setDomain("58.com").setSleepTime(3000).setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");

	@Override
	public void process(Page page) {
		// 列表页
		if (page.getUrl().regex(URL_LIST).match()) {
			System.out.println(1);
			page.addTargetRequests(page.getHtml().xpath("//table[@id=\"jingzhun\"]").links().regex(URL_POST).all());
			page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());
			// 文章页
		} else {
			System.out.println(2);
			String a = page.getHtml().xpath("//div[@class='mainTitle']/h1").toString();
			System.out.println(a);
			// page.putField("title",
			// page.getHtml().xpath("//div[@class='articalTitle']/h2"));
			// page.putField("content",
			// page.getHtml().xpath("//div[@id='articlebody']//div[@class='articalContent']"));
			// page.putField("date",
			// page.getHtml().xpath("//div[@id='articlebody']//span[@class='time SG_txtc']").regex("\\((.*)\\)"));
		}
	}

	@Override
	public Site getSite() {
		return site;
	}

	public static void main(String[] args) {
		Spider.create(new Info58Processor()).addUrl("http://yinchuan.58.com/jiefdj/baomu/?key=月嫂&ampcmcskey=月嫂&ampfinal=1&ampspecialtype=gls&nearby=jiefdj&PGTID=163286953188173290720248650&ClickID=1").run();
	}
}